{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基础包与数据导入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6028 entries, 0 to 6027\n",
      "Data columns (total 7 columns):\n",
      "house_id        6028 non-null int64\n",
      "neighborhood    6028 non-null object\n",
      "area            6028 non-null int64\n",
      "bedrooms        6028 non-null int64\n",
      "bathrooms       6028 non-null int64\n",
      "style           6028 non-null object\n",
      "price           6028 non-null int64\n",
      "dtypes: int64(5), object(2)\n",
      "memory usage: 329.7+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>house_id</th>\n",
       "      <th>neighborhood</th>\n",
       "      <th>area</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>style</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1112</td>\n",
       "      <td>B</td>\n",
       "      <td>1188</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>ranch</td>\n",
       "      <td>598291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>491</td>\n",
       "      <td>B</td>\n",
       "      <td>3512</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>victorian</td>\n",
       "      <td>1744259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5952</td>\n",
       "      <td>B</td>\n",
       "      <td>1134</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>ranch</td>\n",
       "      <td>571669</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3525</td>\n",
       "      <td>A</td>\n",
       "      <td>1940</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>ranch</td>\n",
       "      <td>493675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5108</td>\n",
       "      <td>B</td>\n",
       "      <td>2208</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>victorian</td>\n",
       "      <td>1101539</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   house_id neighborhood  area  bedrooms  bathrooms      style    price\n",
       "0      1112            B  1188         3          2      ranch   598291\n",
       "1       491            B  3512         5          3  victorian  1744259\n",
       "2      5952            B  1134         3          2      ranch   571669\n",
       "3      3525            A  1940         4          2      ranch   493675\n",
       "4      5108            B  2208         6          4  victorian  1101539"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('house_prices.csv')\n",
    "df.info(); df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 变量探索\n",
    "数据质量和整洁度都还不错，毕竟是经过基础评估与清洗的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "neighborhood :\n",
      "                 B     A     C\n",
      "value_counts  2427  1876  1725\n",
      "===================================\n",
      "style :\n",
      "              victorian  ranch  lodge\n",
      "value_counts       2997   1802   1229\n",
      "===================================\n"
     ]
    }
   ],
   "source": [
    "# 类别变量，又称为名义变量，nominal variables\n",
    "nominal_vars = ['neighborhood', 'style']\n",
    "\n",
    "for each in nominal_vars:\n",
    "    print(each, ':')\n",
    "    print(df[each].agg(['value_counts']).T)\n",
    "    # 直接 .value_counts().T 无法实现下面的效果\n",
    "     ## 必须得 agg，而且里面的中括号 [] 也不能少\n",
    "    print('='*35)\n",
    "    # 发现各类别的数量也都还可以，为下面的方差分析做准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 热力图 \n",
    "def heatmap(data, method='pearson', camp='RdYlGn', figsize=(10 ,8)):\n",
    "    \"\"\"\n",
    "    data: 整份数据\n",
    "    method：默认为 pearson 系数\n",
    "    camp：默认为：RdYlGn-红黄蓝；YlGnBu-黄绿蓝；Blues/Greens 也是不错的选择\n",
    "    figsize: 默认为 10，8\n",
    "    \"\"\"\n",
    "    ## 消除斜对角颜色重复的色块\n",
    "    #     mask = np.zeros_like(df2.corr())\n",
    "    #     mask[np.tril_indices_from(mask)] = True\n",
    "    plt.figure(figsize=figsize, dpi= 80)\n",
    "    sns.heatmap(data.corr(method=method), \\\n",
    "                xticklabels=data.corr(method=method).columns, \\\n",
    "                yticklabels=data.corr(method=method).columns, cmap=camp, \\\n",
    "                center=0, annot=True)\n",
    "    # 要想实现只是留下对角线一半的效果，括号内的参数可以加上 mask=mask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAFUCAYAAADPtPD/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAMTQAADE0B0s6tTgAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdeVxU1fvA8c/DKoggoMjmUmppqaVZmqZp30pzN8usrGzR9jIr06ysbDErK5fM6ttm9rXNcNcy66dl7mnupuaCgKKCiijr+f0xQAwjwsUZhoHn/XrdF9x7z708xxnnzHPOufeKMQallFKqMC93B6CUUqri0cZBKaWUA20clFJKOdDGQSmllANtHJRSSjnQxkEppZQDbRyUUko50MZBKaU8kIhMEJE9ImJEpNlZyj0nIrvyljGlPb82Dkop5Zm+A64C9hZXQEQ6ArcCLYCLgBtEpEtpTq6Ng1JKeSBjzFJjTHwJxW4BPjPGnDTGZACfYGssSuRzrgFaIQ+2rTL36sid8rq7Q1AuYkyuu0MoNyJV6/uj0Fmcdq5z+bz7YOWTwLBCW8YbY8aX4Uz1gP8rtL4HuKk0B5Zr46CUUqpkeQ1BWRqDM56u0O+lbvyq1tcCpZSqWvYBDQqt18/bViJtHJRSqvL6FrhLRKqLiD9wDzCjNAdq46CUUi4gXlLmpVTnF5ksIvFALLBYRHbmbZ8vIq0BjDG/At8AG4GtwI/GmIWlOb+OOSillAcyxjwMPHyG7d2KrL8MvGz1/Jo5KKWUcqCZg1JKuUBpu4cqKs0clFJKOdDMQSmlXEAzB6WUUpWOZg5KKeUCmjkopZSqdLRxUEop5UC7lZRSygVEtFtJKaVUJaOZg1JKuYAOSCullKp0tHFQSinlQLuVlFLKBbRbSSmlVKWjmYNSSrmAZg5KKaUqHc0clFLKBTRzUEopVelo46CUUspBpe1Weq//MHq16ECD8CiajbmNzQm73R3SWe3Zc5ARIz4nJSWN4BoBvD72Lho1inYoN+X9+cycuRyAHj2u4PGhvUrcN3Pmcl5/7VtiYsIBCA4O5ItpwwqOW7VqB+Pe+J7TpzPJycnltdfvomXL811W16JcWff09AzGjJnBpk17ycrM4brrLmXYk33cet+bPXsOMXLE56SknCQ4OIDXXr+TRo2iHMpNmbKAH2b+AUD3Hpfz+OM9Adi4cQ+vvfot27bF07FjM96bMLjgmPnz1/DxRz+SlZ2DIPTv356Bd3Qun4rlcdd7OSMji9Gjp7N58z4wEBtbi9deu5PQsCCX1rc4nt6tVGkbh+/WLWHcj9P47akP3R1KqYx+4Sv697+KG29sx8KFaxk1ahpff/2MXZnVq/9m3rzVzJr9PD4+Xtw64E1aXdaQDh0uPus+gCvbNWHChPsd/u7Bg6mMGPEZH330KA0bRpGRkUVGRla51DmfK+s+deoCAGbPfp7s7FweeGAyixauo+sNl5VrHQt7cbStvn1vvJJFC9fx3KgvmfH103ZlbHVaQ9ysUfj4eHHbrW9zWauGXNXhImrXDmHkszezdct+li/fZndcZGQoUz98mNq1Qzhx4hQ39RvLRRfXo1WrhuVWP3e9l7+esZT09Axmz34eEeH556bx8ceLeHp4v3Kpd2VTabuVlu1cz4HUZHeHUSpHjhxny5Z99OrVBoAuXVpxIP4I8fGH7cotmL+Gvn2vJDDQHz8/X/r1a8e8eatL3Hc2//vq/+jVsw0NG9q+ufr7+xIcHOjkGhbP1XXftu0AHTtcjIjg6+tN+/ZNmTVrZbnVr6gjR06wZct+eva6AoDru7TkwIEjHIg/YlduwYK19O3btqBON/a7knnz1gC2BqBFiwb4+Tl+t2vVqiG1a4cAUKNGAOedV8fh39KV3PleBjh9KpOsrByys3M4mZ5BnchQ51bQAvGSMi8VQakaBxHpdrbF1UFWdomJKUREhODj4w3YbvUbFRVKYmKKXbmExKNER4cVrMfEhpOYkFLiPoDVq/6mT+9XuHXAOBYuXFuwfdeuRE5nZHH3oHfp0/sVxoyZwalTmS6p55m4uu7Nm9dnwcK1ZGZmk5Z2msU/refAAfsP4vKUVEx9ExKP2pVLTChSp5hwEouUKcnOnYmsX/8PbdtceO6Bl5I738u3DOhIUFAA7ds9Tfv2w0k7cYqBAzu5oppVQmm7lfJz3mrA5cDGvPXmwApgvpPjqnKK9oEbU3K5omWK29e5UwtuuKE1AQF+7NqVyL33TCAyMpRLLz2frOwcVq3awaefPk716tUY9ewXTJo4p1xTcVfWffDgLox/+wf63zyW4OBAWrY8nz9WbD/nmM+JQ33PXGH7OhXzj1KMpKQUHn74A1586VYi6tS0HuM5cNd7efnyrSDw2+/jEBFGjvycyZPn8eijPc+5TmVRUTKAsipV5mCM6WyM6QzsAtobY1oaY1oC7YAtxR0nIsNEJD5/YUOic6KuZKKiQklKSiE7OwewfRAkJaUQFWWfEkdHhdl96004cISo6NAS94WGBREQ4AdAw4ZRdLy6GevW7bIdFx1Gp6ubERJSHR8fb7p1b81fG/e4rK5Fubru/v6+jHy2P3GznuOLacMIqVmdRg0dB3/LS2RUKAcd6ptKdFSYXbmo6CJ1SjhKVJEyxTl0MJV77p7Agw/cQNeurZwXfCm48708Y8Yyrrv2Uvz9ffHz86FnzytYuXKH6ypbyVkdc2hijCnosDXGrAKKffcZY8YbY2LzFy5x33/Kiiw8PJimF9Vl9mzbP+2iReuIiQknNraWXbkuXVsRF7eC9PQMMjOz+P775XTvdnmJ+w4e/DclP3z4OCtXbOOipnUB20yQlSt3kJlpG4RetmwLTS6MdXmd87m67mlppwq6yeL3H2bG/5Zy9z3Xllv9igoPr0HTpnWZM3sVAD8u+pPomDBiYsPtynXtYl+nmd//QbfuJQ+iHzp0jLvvfo/77ruePn3buqQOZ+PO93LdurX47bctGGMwxvDrLxu5oLHjLClVOmIlXRWRFcAkY8yXeesDgUeMMaV6F8qDba3lxudg0oCn6N2iI5HBYRxOO0ZaRjqNR99cXn+e3CmvWyq/e3cSI0d+TmrqSYKqV2PsG4No3DiaIYMn8uhjvWjevD4AkyfN44cfbNMbu3VvzbBhfQrOUdy+8ePjWPLzBnx8vMnNNQwY0IHbbu9UcNzHHy1i5sw/8PbxonHjaF566XZq1Ag4l+pb4sq6b926n6FDP8LH2wtvb28eebQH11/f8pziNSb3nI7/Z/dBRo78wlbfoGq8PvZOW32HTOaxR3vQLL++k+cTl1+nbq15YlhvAPbtS+bOO97h9OlMMjKyCAmpzpD7u3DbbVfz/PPTmTd3NfXrRxT8vTvu6MyN/a4sU6wi1uesuOu9nJp6khde+JJdOxMRERo2jOKll2+nZs3qpa8vnZ3WFxT80nVl/rw7Pvont/dJWW0cmgLTgIsBA2wC7jLGbC3V8eXYOLib1cZBeY5zbRw8SVkaB0+mjcO/LF3nkNcItBaRGnnrJ1wSlVJKeThPH5AuVeMgIucZY/4RkYuKbAfAGFPsoLRSSinPU9rMYSLQA5h3hn0GKL97LSillAeoEpmDMaZH3s/zzlZORNobY353RmBKKaXcx9mjTROdfD6llFJu4Owb73l2HqWUUk7i6d1Kzs4cqsxUVaWUqswq7S27lVLKnTRzsOfZ/xpKKaWAMmYOIuJjjMk+w65J5xiPUkpVClUqcxCRi0VkPfBP3vplIvJG/n5jzH+dHJ9SSik3sNqtNAl4BMh/rNM6oLtTI1JKKeV2VruVahhjfit02wwjIuX7wGGllPIAVapbCcgWEV/ypqyKSCxQdW5RqZRSVYTVzGES8ANQS0ReBO4EnnV2UEop5ek8PXOwesvuL0VkN9AbCMT2LIdlLolMKaWU21ieymqMWQ4sF5EQoK7zQ1JKKeVuVqeyLhSRmiISBGwA5orIy64JTSmlPJd4SZmXisDqgHQdY0wq0A2YBTQG+pz9EKWUUp7GareSb97PjsBCY0yWiOhsJaWUKiJ/yr+nspo5bBKRhdieCrdERAJdEJNSSik3s5o5DAK6AhuMMekiEgOMcHpUSinl4SrK2EFZWZ3KehqIK7R+ADjg7KCUUkq5l9XZSrkiklN0cVVwSimliicijUVkuYjsEJFVInLRGcpUE5HPRGSjiGwSkdkiUqukc1sdc6gBBOctdYCngVEWz6GUUpVeOU1lnQp8aIy5ABgHnOnO2PcDQUALY0wz4CAwvKQTW+1WOllo9SQwXkR+BcaW5vjcKa9b+XMezevBke4OodwE1QlydwjlytNnoVgREBrg7hDK1cHHOrs7hFITkQigFXB93qbvgUki0sAYs6dI8UDAN292aRCwsaTzn9OT4ESkMXqVtFJKOTiXzEFEholIfKFl2Bn+RF0gIf/Ba8YYA+wD6hUpNxU4DhzCljWEUIoHs1nKHEQkmbw7suYd6w08ZuUcSimlzs4YMx4YX5qiRdbPlNZem1cuEttdtD8DXgBePNuJrU5lbV3o92wgyRijA9JKKVWE1zn1y5TKfiA2/7HNYuvvrIsteyjsAeCLvNmmiMh0bGMOL57t5JbCN8bsxZaaRAIxgJ+V45VSSjmHMeYQ8CcwMG9TP2DPGcYbdgNdJA+2i5g3lXR+q1NZ2wG7gA+AD4GdInKllXMopZRymvuB+0VkB7YLku8FEJH5IpLf0/MitnGGzdgahVrA8yWd2Gq30njgZmPM73kBtAPeAdpaPI9SSlVq3uUwq80Ysx1w+IJujOlW6PejwE1Wz221V6xafsOQ90eXA9Ws/lGllFIVm9XMIV1ErjXGLAYQkU5AutOjUkopD+ddle6thG3a6vcikoFtapQ/tkEQpZRSlYjVK6TXiEgj4EJs82m3GWOyXBKZUkp5sPIYc3Aly8+QBnKAo3nHRokIxpii82qVUkp5MKtXSA8CJgBZ2K60A1v3UoRzw1JKKeVOVjOH54ErjDHbXBGMUkpVFt6uv0LapayGn6wNg1JKVX6lyhwKPSt6pog8AnwFnM7fb4zR6axKKVVIVRmQTsM2tpBf2wmF1g22u7MqpZSqJErVOBhjPLz3TCmlypenZw76oa+UUsqBNg5KKaUclOUiOKWUUiXw9HsraeaglFLKgWYOSinlAt6enTho5qCUUsqRNg5KKaUcaLeSUkq5gA5IK6WUqnQqdOawZ89BRoz4nJSUNIJrBPD62Lto1CjaodyU9+czc+ZyAHr0uILHh/Yqcd/Mmct5/bVviYkJByA4OJAvpg0rOG7Vqh2Me+N7Tp/OJCcnl9dev4uWLc93WV3P1Xv9h9GrRQcahEfRbMxtbE7Y7e6QnKJhWAxT+w4nPDCE1NNpPBA3ju3J9o8PCfStxtvdHqFl9IX4efswZ9vvjF78sZsitqZhWAwf9Hma8MAQjp1O44G4N9l+2L5+/t6+vNtjKJdGNUYE9qQk8dCstzh66jiCMOa6wVzbqDU+Xt6s2L+ZJ+ZOICs32001OrvzQqKYeP1QwqoFczzjJI8tfo8dR/fblfH39mVc54e4JKIhIsLeY0kMXTyBo6dPcFVsC0a1u4PqfgEYY1i4eyWv//Glm2pzdnqFtAuNfuEr+ve/ikWLXube+65n1KhpDmVWr/6befNWM2v288ybP5qlSzexbNnmEvcBXNmuCXGzniNu1nN2DcPBg6mMGPEZb4wbxNx5o4mb9RwNG0a6vsLn4Lt1S7jqrSHsOZLo7lCc6r2eQ/l07TxaThzEu79/zfu9nnIo81SHWwFoO2UwV7x/Hy0iG9Hnoo7lHWqZvNvjcT5dO59Wk+7m3d+/YXKvJx3K3NO6B9X9qnHlB0NoO2UIh06mMLR9fwDubNWVZnXOp8PUh2g9+V4AHmzbt1zrYMVb1zzMtE2LaDftQSatm8k7/3nUocydzbpS3a8anb56jKunP0pyeioPX2Z7GnFqRhoPLHyLjl8+wvUzhnFlTDNuvMAzXmtPU2EbhyNHjrNlyz569WoDQJcurTgQf4T4+MN25RbMX0PfvlcSGOiPn58v/fq1Y9681SXuO5v/ffV/9OrZhoYNowDw9/clODiwhKPca9nO9RxITXZ3GE5Vq3pNLolqzIy/FgMwa8sy6odGUq9mHbtyzSMb8uNO2+uanZvDkl1ruPWSa8s9XqtqBdrq93V+/bbm1S+kjkPZQN9q+Hr54C1eBPkGkHDc9v+geZ2G/LJ7XUGm8OPfqxjQomLWvVZACM0jzue7bb8CMHfncuoF16FuDcdnhQX4+BfUt7pvAIlptvpuSt7N3uMHAcjIyWJT8j/UD6mYX9y8vaTMS0VQpsZBRHxEJDB/cXZQAImJKUREhODj453/N4mKCiUxMcWuXELiUaKjwwrWY2LDSUxIKXEfwOpVf9On9yvcOmAcCxeuLdi+a1cipzOyuHvQu/Tp/Qpjxszg1KlMV1RTnUVscG2SThwhJze3YNv+Y4eoG2L/YbL2wHZuvPhqfL19CPILoGfTq6hXs2J+YBQWG5JXP/Nv/eKPHSK2SP0+WTOXExkn2fX0t+x66luCq1Vn6qpZAKxL2E73C9sR5BeAr5cP/S7u7NB4VhTRQbVIOnnUrr4HTiQTU6O2XbkvNi0kLTOdzYO/YNN9X1DDP5D/bpjncL7agTXp2agdi/escXnsVZGlxkFErhCRjdie5XCi0FJc+WEiEp+/jB//raXgpEifnTEllytaprh9nTu1YMkvrxE36zleefUOxr7+HevX2/rps7JzWLVqB+++N5jvvh9J2olTTJo4x1LsyjlMkRdUcPxW9c7vM4g/lsyvgyfx9a1jWLl/M1k5FbPPvajS1K/z+a0wBhq/1Z/Gb9/CsdNpjLh6IABfbfiJn3etYcGg8cy96022Je8hOyenXGIvC4f/w2fol+9Y9xKMgeYf30WL/w7ieMZJnmwzwK5MkF8A03o+z6R1M9mYXDnG1yoaq5nDBOA+YCNQE3gBeLq4wsaY8caY2Pxl2LCbS/2HoqJCSUpKITs7J/9cJCWlEBUValcuOiqMAweOFKwnHDhCVHRoiftCw4IICPADoGHDKDpe3Yx163bZjosOo9PVzQgJqY6Pjzfdurfmr417Sh27co7448lEB9fG2+vft2lsSG32HztkVy4jO4uRi6bQ/oMH6P75U6ScOsG25L3lHa5l8cfy6if/1i8mpDbxRep3T+sezNn2Gxk5WWTlZvPNxp/p0ODSgv1vLP2SDh8+SJdPn2DH4f0Vtu4JaYeJDgq3r29QLQ6csO8Ovav5Dczf/UdBfb/f/n+0j21esL+6bwAzer/Iot2rmPrnrHKL3ypvKftSEVhtHHyNMSsBH2PMCWPMq0Cvkg4qi/DwYJpeVJfZs1cCsGjROmJiwomNrWVXrkvXVsTFrSA9PYPMzCy+/3453btdXuK+gwf/7V46fPg4K1ds46KmdQHbrKaVK3eQmZkFwLJlW2hyYawrqqnO4vDJVP5K2lnQh977og7sSz3IvtSDduVq+AcS4OsPQP2akdzbuicT//iu3OO16nC6rX635NevaV79jtnXb09KIv9p1LpgvcsFbdmavAewzewJ8a8OQFhAME9cNYB3l39TPhWw6PCpY2xM3s1NTToB0KNRO/afOMT+E/aN4d5jSXSu16pg/boGl7PtiK3BC/StxozeL/LL3j95Z/XX5RZ7VSRF09qzFhZZaYxpIyK/AkOBeGC1Mea80hxv+KX0fwzYvTuJkSM/JzX1JEHVqzH2jUE0bhzNkMETefSxXjRvXh+AyZPm8cMPfwDQrXtrhg3rU3CO4vaNHx/Hkp834OPjTW6uYcCADtx2e6eC4z7+aBEzZ/6Bt48XjRtH89JLt1OjRkCpY/d6cKSVqp6zSQOeoneLjkQGh3E47RhpGek0Hl36TO1cBNUJctm5G4fH8kGf4YQF2qY+3v/DOLYl7+W721/l1V8+58+EHTSPbMgXNz9Pdm4O2bk5vP7rF8ze+pvLYira3XkuGoXH8kHvp/Pql84DcXn1uy2vfok7CK1Wg/d6DqVJ7foYY9iWvI+hc98l5fQJalevyYJBb5OTm4u3lxfvr/iBT9bOdVp8AaGlf8+XRsOaMUy47nFCq9XgRGY6j/30LtuP7md6rxcYt+IrNhzaSU3/IN76z8NcEFYXY2DH0f08vWQyqRlpDG19M0+1uZXtR/+d7jvn7995d421LuviHHxsttNe3P98f7ulz7vCfu433e35g9XG4QngC+Ay4Dts10m8YIx5qzTHW20cPFl5Nw7u5MrGoSJyZuNQ0Tm7cajotHH4l6WL4Iwx7+T9+qOIhAPVjDHFDkgrpVRVVaUughMRbxF5XEQmGWOygAgRucZFsSmllHITq7fPmAj4AlflrR8BZgCXOzMopZRS7mW1cWhnjLlURP4EMMakioifC+JSSimPVqW6lbBd/FZARLzLcA6llFIVnNXM4S8RuR0QEWkAjASWOjsopZTydN4e/rXZavjDgI5AFLAy7/jhzg5KKaWUe5U6c8jrQupijLkfuN91ISmllOerMmMOxpgcbJmDUkqpSs5qt9IaEbnSJZEopZSqMKwOSHcEHhaRHUAaIIAxxlzh9MiUUsqDVZSH9pSV1cZh6Bm2VZn7JSmlVFVhtXHYDIwGLgGqFdqumYNSShVSZQak83yC7TbdkcAY4BCwyNlBKaWUci+rjUM9Y8wbwGljzBzgRqCd88NSSinlTla7lTLzfmaISBiQCugj0pRSqghPv0LaauOwPa9R+BJYARwD/nR6VEoppdzK6sN+7sj79T0RWQOEAgucHpVSSnk4Tx+Qtpo5FDDG/O7MQJRSSlUcZW4clFJKFc/TL4Lz8CETpZRSrqCNg1JKKQfaOCillAt4i5R5KS0RaSwiy0Vkh4isEpGLiil3tYisFpHNIrKtNDdQ1TEHpZTyXFOBD40xn4nITcB/AbsPfhGJBj4HbjDGbBWRatjf/uiMNHNQSikX8PYq+1IaIhIBtMJ23RnA98B5eY9wLuwh4EtjzFYAY8xpY0xqSefXzMFFguoEuTuEcpN2MM3dIZQrbz9vd4dQbhqcF+ruEFTx6gIJxphssD07QUT2AfWAPYXKXQT8IyKLgVrAMuAZY0z62U6umYNSSrnAuYw5iMgwEYkvtBT3FM6ij0w404CFL9AJuBloDYQAL5YUv2YOSilVwRhjxgPjSyi2H4gVER9jTLaICLZsYl+RcnuBP40xKQAiMgMYXlIMmjkopZQHMsYcwnZvu4F5m/oBe4wxe4oU/QroLCL+eetdgQ0lnV8bB6WUcgFvKftiwf3A/XmPbh4B3AsgIvNFpDWAMWY5MAdYLyIbgdrACyWdWLuVlFLKQxljtlNk6mre9m5F1scB46ycWxsHpZRyAS8PvyurdisppZRyoJmDUkq5gMWxgwpHMwellFIOtHFQSinlQLuVlFLKBTz8WT+aOSillHKkmYNSSrmADkgrpZSqdDRzUEopF/Dy8EEHzRyUUko50MZBKaWUA+1WUkopF9ABaaWUUpWOZg5KKeUCHj4erZmDUkopR9o4KKWUcqDdSkop5QI6IK2UUqrS0cxBKaVcwNMfE+oxjcOePQcZMeJzUlLSCK4RwOtj76JRo2iHclPen8/MmcsB6NHjCh4f2qvEfenpGYwZM4NNm/aSlZnDddddyrAn+yAV9MVtGBbD1L7DCQ8MIfV0Gg/EjWN78j67MoG+1Xi72yO0jL4QP28f5mz7ndGLP3ZTxM71Xv9h9GrRgQbhUTQbcxubE3a7O6Qya1Q7lk9vf47w6iEcO5XGPdNfZevBPXZlAv2qMeGmYVxWtwl+3r7M2vh/PDvnAwAGXt6VoZ0GFJSNrVmbZbs2cPMnz5ZnNUqtXo1IXmn/MDWr1eBEZjrP/z6Z3ccO2JW5p1lvujZoX7AeGxTBzJ1LeGvNFwXb/Lx8+brHG5zOzuDW+SPLLf6qxGO6lUa/8BX9+1/FokUvc+991zNq1DSHMqtX/828eauZNft55s0fzdKlm1i2bHOJ+6ZOXQDA7NnPM2fuC2zZup9FC9eVX+Useq/nUD5dO4+WEwfx7u9f836vpxzKPNXhVgDaThnMFe/fR4vIRvS5qGN5h+oS361bwlVvDWHPkUR3h3LOpvQfzkfLZ3PRq7fy5s/T+ehWxw+6kdfdCUDLN+7kkrEDuTTmAvpd2hmAL1cvpPWbgwqWxONH+Grtj+VaByteaDuE7/5eTK+4oXy2aRYvtXvQocwnm2bRf+5w+s8dzm3zR5KVm8283cvsyjzacgAbkneUV9hl4i1lXyoCj2gcjhw5zpYt++jVqw0AXbq04kD8EeLjD9uVWzB/DX37XklgoD9+fr7069eOefNWl7hv27YDdOxwMSKCr6837ds3ZdasleVbyVKqVb0ml0Q1ZsZfiwGYtWUZ9UMjqVezjl255pEN+XGnrX7ZuTks2bWGWy+5ttzjdYVlO9dzIDXZ3WGcs9pBNWkZewHT1ywCYOaGX2kQHkX9sEi7ci1iGrFwywrA9lr+tH0VA1t3dTjf5fWaUqdGGHM2LnPYVxGEVQumSfh5BR/0P+1bSUxQBNHVaxd7zDV1r+Bg+lG2Hv2nYFuriCbUD45i7u6lLo+5KrPUOIjIyyJSU2zmichhEennquDyJSamEBERgo+Pd34cREWFkpiYYlcuIfEo0dFhBesxseEkJqSUuK958/osWLiWzMxs0tJOs/in9Rw4cMTV1SqT2ODaJJ04Qk5ubsG2/ccOUTckwq7c2gPbufHiq/H19iHIL4CeTa+iXs3IoqdTblS3Zh0Sjh8mJzenYNv+lIPUC7Vv6Nfs28rNLa+xvZb+gfRp0ZH64Y6v5d1tezB9zUKyC52vIqkTGE5yego55t/3buLJw0RVr1XsMX0bd+aHnUsK1gN8/Bl++SBeWfGRS2NV1jOH3saYVOBaIBtoD4wqrrCIDBOR+Pxl/Phvyxxo0f5/Y0ouV7RMcfsGD+5CVGQo/W8ey0MPvk/Llufj4+td5lhdzRSpmOCYh77z+wzijyXz6+BJfH3rGFbu30xWTnZ5hahKyeF9fIZxrnGLpxOfeog/hn1M3OA3+OOfTQ6vZYCvP/1b/odP/pjrwmjPnZ9FDikAACAASURBVKHoe7d4dQLDaVW7iV2X0rDLBjJj+yIOnUo5y5EVg5eUfakIrA5I5zf5VwPfGmO2n23Q1hgzHhhfsM4vxXykn11UVChJSSlkZ+fg4+ONMYakpBSiokLtykVHhdl94084cISo6NAS9/n7+zLy2f4F+z78cCGNGkaVJVSXiz+eTHRwbby9vAqyh9iQ2uw/dsiuXEZ2FiMXTSlYH3bVALYl7y3XWNXZ7U89SGzN2nh7eRdkD3VrRrAv5aBduYzsTJ78YULB+vBrB7I1aY9dmX6Xdmbbwb0Og9kVycH0I9QJDMdbvAqyh8jqtUg8efiM5fs06sSv8Ws5nnmyYFvLiCZcFdOS+1v0w9/bj2C/6szs9TY3zn6yXOpQlVjNHE6KyAhgAPCTiHgBfs4Py154eDBNL6rL7Nm2cYBFi9YRExNObKx9Otqlayvi4laQnp5BZmYW33+/nO7dLi9xX1raKU6dygQgfv9hZvxvKXffUzH75w+fTOWvpJ0MaGGLr/dFHdiXepB9qfYfKDX8Awnw9Qegfs1I7m3dk4l/fFfu8ariJaelsj5+B7e37gLAjZd0Yu/RJPYeTbIrV/i1bBAWxf3t+/LOLzPsytzdpjufrKjYWcPR08fZdvQfup/fAYDr6rUhIe0QCSfPPH7Uu2EnZhbqUgK4ac7T3DDzEW6Y+QjDl77L3yn7KmzD4C1S5qUisJo5DAIeAYYbYw6KSCNgutOjOoOXXrqdkSM/Z+rUhQRVr8bYNwYBMGTwRB59rBfNm9enTZsL6dr1Mnr1HANAt+6t6dDxYoCz7tu//zBDh36Ej7cX3t7ejBh5M02b1i2PapXJ43Pe4YM+w3mqw20czzjJ/T+MA+C721/l1V8+58+EHTQIjeKLm58nOzeH7NwcRi6awsakXW6O3DkmDXiK3i06EhkcxuLHJpKWkU7j0Te7O6wyefCbN/nktlGMuO4Ojp9O557prwAw5/63eHH+x6zdv43za8Xwv0EvF7yWT/4wgQ0H/i44x/nhMbSq24TeHz3jrmqU2pgVHzKm/cPc17wvJzNP8dzvkwGYfM0IJm/4hi1HbNOS20Q2QxBWJm50Z7hVmhTtv3alsnYreaLgF191dwjlJu1gmrtDKFfefhV3PMrZLrosxt0hlKu/7vzGaV/bx619oMyfd8Mv+8Dt6YOlzEFEmmIbgD6/8LHGmCucHJdSSik3stqt9A3wBfAJUDHnyymllDpnVhuHLGPMmy6JRCmlKpGKcqVzWVmdrbRQRBwvzVRKKVWpWM0cfgZmiUgOkIHtGhZjjIk4+2FKKVW1eHnEzYmKZ7VxmIptOus6dMxBKaUqLauNwxFjjF5JpZRSJagoF7OVldXE5wcReUBEwkQkMH9xSWRKKaXcxmrm8Frez/cLbTNA1bkqSCmlqgBLjYMxxsOHWJRSqnxUlLurlpXlx4SKSAxwFbaM4TdjTILTo1JKKeVWVh/20xvYANwK3AasF5GerghMKaU8mac/JtRq5jAaaGuM2QkgIg2Bb4E5zg5MKaWU+1gdQ/DObxgAjDG7ynAOpZRSFZzVD/ZDInKv5D3+TUTuAs78GCellKrCPP0xoVYbhweAwcApETmVtz7E6VEppZRyK6tTWXcBbUUkCNuDgk64JiyllPJsnn6FdFmmsvYDrgWMiPxkjPnB+WEppZRyJ6tPgnsB6IPtgT8Ao0TkYmPMK06PTCmlPFhFGTsoK6uZw03YprKmA4jIR8AfgDYOSilViVgdkJb8hgHAGHMS2zMdlFJKVSJWM4dVIvIF8AG222cMBlY7PSqllPJwFeVK57Kymjk8BiQAE4BJwCHgUWcHpZRSyr1KnTmIiDdwnzFmhAvjUUqpSsGrHKayikhj4HOgFpAKDDLGbCmmbG1gE7DMGHNTSecudeZgjMkB+pW2vFJKKZebCnxojLkAGAf89yxl3wfml/bEVsccfhKRW4wxX1s8DgBjcstymEcSD78Axgpvv6r1rKeczKrz+HRvT5+P6UauHnMQkQigFXB93qbvgUki0sAYs6dI2duBg8AaoEdpzl+WMYf/ichJETkkIskicsjiOZRSSp27ukCCMSYbwBhjgH1AvcKFRCQaGAZYGhKwmjm0tlheKaWURSIyDNsHer7xxpjxZyhqih56hjIfAcONMWlWejSs3ltpr5XySilVVZ3LgHReQ3CmxqCw/UCsiPgYY7Lz7pZdF1v2UNiVwH/zGoYgIEBEFhljupzt5KVqHEQkGccWqoAxJqI051FKKeUcxphDIvInMBD4DNuEoT1FxxuMMWH5v4vIIKBHaWYrlTZzyO9Oug8IAz7Elr7cAxwo5TmUUqrKKI+prMD9wGci8ixwHLgLQETmAy8YY9aU9cSlahzyu5NEpKMx5upCux4TkaXAG2UNQCmlVNkYY7Zj6zYqur1bMeU/w5ZllMjqgHS0iNQyxhwGEJFaQJTFcyilVKVXTpmDy1htHN4FNojI3Lz1bsBrzg1JKaWUu1mdrTRZRJYBV2Mbc5hkjNnoksiUUkq5jeUnwQFJwHpjzDIR8RERP2NMprMDU0opT+YlVq8xrlgsRS8iNwKr+PdJcBcDcc4OSimllHtZzRyeBS4DFgMYYzaISH2nR6WUUh7O0wekreY9ucaYI0W2aZeSUkpVMlYzhxMiUoe8q6VFpDOQ4vSolFLKw3l65mC1cXgG2/3AzxORX4HGQE9nB6WUUsq9rE5lXSMi1wDtsE1lXW6MSXVJZEoppdymLFNZg4AQbF1LgdgeTaeUUqoQT+9WsjqVdQCwHugPDADWi0h/VwSmlFLKfaxmDi8CVxhj/gEQkQbAQuAbp0allFIezsvyZNCKxWr0h/MbBoC8+4YfdmpESiml3K5UjYOIBIpIIPCTiDwnIpEiEiUio9ArpJVSqtIpbbdSGrYB6PwRlpcL7TPAW84MSimlPJ2nD0iX9mE/nt15ppRSypKyTGVVSilVAk/PHDQjUEop5cBjMoc9ew4xcsTnpKScJDg4gNdev5NGjRyfUDplygJ+mPkHAN17XM7jj9vu7rFx4x5ee/Vbtm2Lp2PHZrw3YXDBMfPnr+Hjj34kKzsHQejfvz0D7+hcPhUrhYZhMXzQ52nCA0M4djqNB+LeZPvhfXZl/L19ebfHUC6NaowI7ElJ4qFZb3H01HEEYcx1g7m2UWt8vLxZsX8zT8ydQFZutptqdHaNasfy6e3PEV49hGOn0rhn+qtsPbjHrkygXzUm3DSMy+o2wc/bl1kb/49n53wAwMDLuzK004CCsrE1a7Ns1wZu/uTZ8qyG07zXfxi9WnSgQXgUzcbcxuaE3e4Oqczq1Yjk5XYPUdO/Bicy0xn9x/vsPnbArszdF/emS/12BesxQRHE7VrC22unFWzz8/Llf93Gcjong9sXVMzXtUo9z8GdXhz9Ff37X8XCRS9y773X8dyoLx3KrF79N/PmrSFu1ijmznueZUs389uyLQDUrh3CyGdvZsSImxyOi4wMZeqHDzNnzvNM/+pJpk37lXXrdrm8TqX1bo/H+XTtfFpNupt3f/+Gyb2edChzT+seVPerxpUfDKHtlCEcOpnC0Pa26xPvbNWVZnXOp8PUh2g9+V4AHmzbt1zrYMWU/sP5aPlsLnr1Vt78eTof3TrSoczI6+4EoOUbd3LJ2IFcGnMB/S61Nehfrl5I6zcHFSyJx4/w1dofy7UOzvTduiVc9dYQ9hxJdHco52xUm8F8//fP9Jn9BJ9vmc3otg84lPl08ywGzH+GAfOfYeDCZ8nKzWb+P7/ZlXnk0gH8dXhHeYVdJXlE43DkyAm2bNlPz15XAHB9l5YcOHCEA/H2dw9fsGAtffu2JTDQHz8/X27sdyXz5q0BbA1AixYN8PNzTJZatWpI7dohANSoEcB559UhPr5iXL5RK7Aml0Q15uu/FgMwa+sy6odGUi+kjkPZQN9q+Hr54C1eBPkGkHDcVofmdRryy+51BZnCj3+vYkCLa8uvEhbUDqpJy9gLmL5mEQAzN/xKg/Ao6odF2pVrEdOIhVtWAJCdm8NP21cxsHVXh/NdXq8pdWqEMWfjMtcH7yLLdq7nQGqyu8M4Z6H+wTQNO4/5/9hei8X7VhIdFEFU9drFHtM59nIOpR9l69GCy6toWbsJ9YIjmfuP576mnsDq7TP8Cv1+voj0EBFv54dlLykxhYiIEHx8vPP/NlFRoSQkHrUrl5hwlOjosIL1mJhwEouUKcnOnYmsX/8PbdtceO6BO0FsSG2SThwhx+QWbIs/dojYkAi7cp+smcuJjJPsevpbdj31LcHVqjN11SwA1iVsp/uF7QjyC8DXy4d+F3emXk3HxqUiqFuzDgnHD5OTm1OwbX/KQeqF2se7Zt9Wbm55Db7ePgT5B9KnRUfqh0cWPR13t+3B9DULyS50PuUekdXDSU5PsXsvJ508TFT1WsUe06fRNcTt+qVgvZq3P0+1votXV37s0lidwUukzEtFYDVz+F1EaohIOLAMGAlMdn5YZ1DkH8wYU0wxKbFMcZKSUnj44Q948aVbiahT03qMLlK0HoLjm6fz+a0wBhq/1Z/Gb9/CsdNpjLh6IABfbfiJn3etYcGg8cy96022Je8hO6fiflg6vGxn+M8ybvF04lMP8cewj4kb/AZ//LOJrBz7MZQAX3/6t/wPn/wx14XRKisMRd7LZ/kgrBMYTsuICwsyDYAnWg3kmx2LSD6lj5FxNauNg48x5gTQHfjcGNMe2+27z0hEholIfP7yzvjvyhRkZFQoB5NSyM62faAZY0hKSiU6KsyuXFR0GAcO/NvVlJBwlKgiZYpz6GAq99w9gQcfuIGuXVuVKU5XiD+WTHRwbbwLDW7FhNQm/tghu3L3tO7BnG2/kZGTRVZuNt9s/JkODS4t2P/G0i/p8OGDdPn0CXYc3s+25L3lVgcr9qceJLZmbby9/k1I69aMYF/KQbtyGdmZPPnDBFq/OYhrJz3K0fTjbE3aY1em36Wd2XZwr8NgtnKPpJNHiAgMt3sv1wkMJ/HkmbtwezfsxP/Fr+V45smCbZdGXMiQ5v2Y12ciY696nEY16/Fdj4p5DW5Vyxz88352Apbk/Z575qJgjBlvjInNX54Y5jgYXBrh4TVo2rQuc2avAuDHRX8SHRNGTGy4XbmuXVoRF7eC9PQMMjOzmPn9H3TrflmJ5z906Bh33/0e9913PX36ti1TjK5yOD2Vv5J2ckveGEHvph3Yl3qQfcfsPyz3pCTyn0atC9a7XNCWrcl7ANtMphD/6gCEBQTzxFUDeHd5xbxXYnJaKuvjd3B76y4A3HhJJ/YeTWLv0SS7cjX8Awnwtb0dG4RFcX/7vrzzywy7Mne36c4nKzRrqChSMo6zPeUfup3XAYBr67UhIS2ZxJNnHk/pef7VxO38xW7bLfOG0z3uUbrHPcqI395jZ+o+bpr7lMtjr4qsTmVdIiJb8o67X0RCgXKZD/nSS7cxcuQXTJ26iKCgarw+1jZbZciQyTz2aA+aNa/PFW0uoGvXy+jd6xUAunVrTYcOFwOwb18yd97xDqdPZ5KRkUWnq59lyP1duO22q5k4cS6JiSlMm/YL06bZ3ox33NGZG/tdWR5VK9Hjc9/lg95P81SHWzmekc4DceMA+O62V3n1l8/5M3EHr/86jfd6DmXVQx9jjGFb8j6Gzn0XgOBq1Vkw6G1ycnPx9vLi/RU/sHDHCndW6awe/OZNPrltFCOuu4Pjp9O5Z7rt9Zxz/1u8OP9j1u7fxvm1YvjfoJfJzs0hOzeHJ3+YwIYDfxec4/zwGFrVbULvj55xVzWcZtKAp+jdoiORwWEsfmwiaRnpNB59s7vDKpNXVn7Ey1c+xL3N+nAy6xTPL38fgImdRzBlwzdsOWqbpntFZDNEhJVJG90Z7jmpKBlAWYmVfnmxdRBeAuw2xhwXkVpAXWPMn6U5Ptf8bG0QwIPVfHmsu0MoN+lH090dQrnKyay44zXOdmn7+u4OoVz9OfBrp32iL014rsyfdx2jX3F7y2L1MaFGRHYAsSISm7c5w/lhKaWUcidLjYOIPIHtjqwpQP7XJwOc7+S4lFLKo3n6FdJWxxweBS40xiS4IhillFIVg9XGYb82DEopVTKvM1yP5EmsNg6jReRjYD5wOn+jMWa+U6NSSinlVlYbhz5AT+AC7McctHFQSqlCPH0qq9XGoTfQwBhzyhXBKKWUqhisDqfvArJcEYhSSqmKw2rm8De2q6TjsB9zeN+pUSmllIeralNZq2HLHpoX2lZlrnpWSqmqwuoV0ne7KhCllKpMqtSAtIj4AI8D12LLGH4CJhpjKubDiJVSSpWJ1W6l8UBDYGre+r3AecBjzgxKKaU8XZXKHLA9x+FSY2zP+RORucA6ZwellFLKvawOp0uRYyRvUUopVYlYzRwWAYtE5L/YxhwGAQucHZRSSnm6qjaVdThwP3AjtozhB+BDZwellFLKvaxOZc0FpuQtSimliuHpA9KW8h4RiRaRuSJyMm+ZLSJRrgpOKaWUe1jtFJsKLAdi8pblaLeSUkpVOlbHHOoaY3oWWh8rIuudGZBSSlUGnv6wH6uZg5eIROaviEgEOpVVKaUqHauZw5vAnyIyB9tU1m7ASKdHpZRSHs7TB6StzlaaJiLrgM7YMob3jDFbXBKZUkoptyl14yAi3sBCY8x1wGbXhaSUUp6vPC6CE5HGwOdALSAVGFT0C7uI3AKMAHyx9fh8aIyZWNK5S904GGNyxMbbGJNT8hGOxMOvGLQiIDTA3SGUmwbnhbo7hHLl7eXZ3QVWrP99r7tDKF8D3R2AZVOxfdh/JiI3Af8FrixSJh64wRiTJCIhwFoRWWeM+f1sJ7Y65rACiBORaUBa/kZjzHyL51FKKXUO8iYEtQKuz9v0PTBJRBoYY/bklyvcCBhjjonINmx303Zq49A+7+eDhbYZQBsHpZQq5FwGpEVkGDCs0KbxxpjxRYrVBRLyn6djjDEisg+oB+wp5rwXYcsshpQUg9UB6c5WyiullLIuryEo2hicsWiR9WJbJBGJBWYBDxhjEko6sdXMgbzbZZxX+FhjzFKr51FKqcqsHMZY9wOxIuJjjMkWEcGWTexzjEWigcXAK8aYb0tzcquPCR0FPA3sBvIHpQ1whZXzKKWUOjfGmEMi8ie2YfTPgH7AnsLjDVDwhf5n4A1jzOelPb/VzOEeoJEx5rDF45RSqkrxsnwDijK5H/hMRJ4FjgN3AYjIfOAFY8wa4GVs4xCPi8jjece9Z4z59Gwntto4JGnDoJRSFYMxZjuOU1cxxnQr9PtgYLDVc5eqccgb4QbbU+DeBqYDpwv9cb1KWimlKpHSZg7ziqzfWOh3A5zvnHCUUqpy8PSLfkvVOBhjznN1IEoppSoOq0+CiyvNNqWUquq8xKvMS0VgNYp6Z9jW0BmBKKWUqjhKOyA9GNvl1heIyKpCu0KA7a4ITCmlPJmUz1RWlyntgPSPwN/AFGwXweU7Dvzl7KCUUkq5V2kHpPcCe4Gmrg1HKaVURWD19hm1gNHAJUC1/O3GGL19hlJKFVJRBpbLymr0n2B7cEQkMAY4BCxydlBKKaXcy/JsJWPMG8BpY8wcbBfDtXN+WEop5dkErzIvFYHVKDLzfmaISBiQDcQ6NySllFLuZvXGe9vzGoUvsT0y9Bjwp9OjUkop5VZWnwR3R96v74nIGiAUWOD0qJRSysN5+oB0WZ4EFwN0wHbDvd+MMTklHKKUUsrDWL230gBgPXALMAD4U0T6uyIwpZTyZCJeZV4qAquZw4vAFcaYfwBEpAGwEPjGqVEppZRyK6uNw+H8hgHAGLNHRPTJcEopVUQ5PSbUZUoVvYgEikgg8JOIPCcikSISJSKjAL1lt1JKVTKlzRzSsA1AS976y4X2GeAtZwaVb8+eg4wY8TkpKWkE1wjg9bF30ahRtEO5Ke/PZ+bM5QD06HEFjw/tVeK+mTOX8/pr3xITEw5AcHAgX0wbBkBGRhajR09n8+Z9YCA2thavvXYnoWFBrqhmic4LiWLi9UMJqxbM8YyTPLb4PXYc3W9Xxt/bl3GdH+KSiIaICHuPJTF08QSOnj7BVbEtGNXuDqr7BWCMYeHulbz+x5duqUtp1KsRySvtH6ZmtRqcyEzn+d8ns/vYAbsy9zTrTdcG7QvWY4MimLlzCW+t+aJgm5+XL1/3eIPT2RncOn9kucVvRb0akbzc7iFq+tvqOvqP9x3qevfFvelS/99rTWOCIojbtYS3104r2Obn5cv/uo3ldE4Gty94ttzid7b3+g+jV4sONAiPotmY29icsNvdIVVZpcocjDFexhjvvJ9FF29XBTf6ha/o3/8qFi16mXvvu55Ro6Y5lFm9+m/mzVvNrNnPM2/+aJYu3cSyZZtL3AdwZbsmxM16jrhZzxU0DABfz1hKenoGs2c/z5y5L1CrVg0+/th9dwl565qHmbZpEe2mPcikdTN55z+POpS5s1lXqvtVo9NXj3H19EdJTk/l4cv6AZCakcYDC9+i45ePcP2MYVwZ04wbL+hY3tUotRfaDuG7vxfTK24on22axUvtHnQo88mmWfSfO5z+c4dz2/yRZOVmM2/3Mrsyj7YcwIbkHeUVdpmMajOY7//+mT6zn+DzLbMZ3fYBhzKfbp7FgPnPMGD+Mwxc+CxZudnM/+c3uzKPXDqAvw5X7LqWxnfrlnDVW0PYcyTR3aGcM08fkK4YUZzBkSPH2bJlH716tQGgS5dWHIg/Qny8/RDHgvlr6Nv3SgID/fHz86Vfv3bMm7e6xH0lOX0qk6ysHLKzcziZnkGdyFDnVrCUagWE0DzifL7b9isAc3cup15wHerWiHAoG+Djj6+XD97iRXXfABLTbP9Wm5J3s/f4QQAycrLYlPwP9UMiy60OVoRVC6ZJ+HkFH/Q/7VtJTFAE0dVrF3vMNXWv4GD6UbYeLRgOo1VEE+oHRzF391KXx1xWof7BNA07j/n/2Oq6eN9KooMiiDpLXTvHXs6hInVtWbsJ9YIjmfvPsmKP8xTLdq7nQGqyu8NQlKFxEBHvvFlKLpWYmEJERAg+Pt75f5eoqFASE1PsyiUkHiU6OqxgPSY2nMSElBL3Aaxe9Td9er/CrQPGsXDh2oLttwzoSFBQAO3bPU379sNJO3GKgQM7uaKaJYoOqkXSyaPkmNyCbQdOJBNTw/4D5ItNC0nLTGfz4C/YdN8X1PAP5L8b5jmcr3ZgTXo2asfiPWtcHntZ1AkMJzk9xa6+iScPE1W9VrHH9G3cmR92LilYD/DxZ/jlg3hlxUcujfVcRVZ3rGtSCXXt0+ga4nb9UrBezdufp1rfxasrP3ZprMq6KvWYUBHpgO25Dkvz1i8XEce+nn/LDxOR+Pxl/PhvLQUnInbrxpRcrmiZ4vZ17tSCJb+8Rtys53jl1TsY+/p3rF9v699cvnwrCPz2+ziWLXuDGsGBTJ7s+EFbXhzqXeTfBaBj3UswBpp/fBct/juI4xknebLNALsyQX4BTOv5PJPWzWRjcsXtyzXYV9ixtv+qExhOq9pN7LqUhl02kBnbF3HoVMpZjqwYHOp6htc2X53AcFpGXFiQaQA80Wog3+xYRLIH1FV5FqtN1DjgauAIgDFmNdCquMLGmPHGmNj8Zdiwm0v9h6KiQklKSiE7Oyf/XCQlpRAVZd+9Ex0VxoEDRwrWEw4cISo6tMR9oWFBBAT4AdCwYRQdr27GunW7AJgxYxnXXXsp/v6++Pn50LPnFaxc6Z7+3IS0w0QHheNd6NtETFAtDpywT73van4D83f/QUZOFlm52Xy//f9oH9u8YH913wBm9H6RRbtXMfXPWeUWv1UH049QJ9C+vpHVa5F48swzpvs06sSv8Ws5nnmyYFvLiCbc36IfC26cxLiOQ2kcWo+Zvd52eexWJZ08QkSRutYJDC+2rr0bduL/itT10ogLGdK8H/P6TGTsVY/TqGY9vuvhkvkhyiLBu8xLRWC1cfAxxuwqsi3zjCXPUXh4ME0vqsvs2SsBWLRoHTEx4cTG2qfcXbq2Ii5uBenpGWRmZvH998vp3u3yEvcdPPjvN63Dh4+zcsU2LmpaF4C6dWvx229bMMZgjOHXXzZyQWPHWVLl4fCpY2xM3s1NTToB0KNRO/afOMT+E4fsyu09lkTnev+209c1uJxtR/YCEOhbjRm9X+SXvX/yzuqvyy32sjh6+jjbjv5D9/M7AHBdvTYkpB0i4eSZ+6F7N+zEzEJdSgA3zXmaG2Y+wg0zH2H40nf5O2UfN85+0uWxW5WScZztKf/Q7TxbXa+t14aEtGQSi6lrz/OvJm7nL3bbbpk3nO5xj9I97lFG/PYeO1P3cdPcp1weu6r8rF4Ed1pEgrBNX0VELgZOOz2qPC+9dDsjR37O1KkLCapejbFvDAJgyOCJPPpYL5o3r0+bNhfStetl9Oo5BoBu3VvToePFAGfdN336/7Hk5w34+HiTm2u4667/0PbKJgA88kgPXnjhS3p0fwkRoWHDKF56+XZXVbNETy95nwnXPc7jrW/mRGY6j/30rq0OvV5g3Iqv2HBoJ2+u/B9v/edhlg6chDGw4+h+nl4yGYAhl/SkZZ3GBPr6061hWwDm/P07766x1s1XXsas+JAx7R/mvuZ9OZl5iud+t9Vj8jUjmLzhG7YcsXWJtYlshiCsTNzoznDPySsrP+LlKx/i3mZ9OJl1iueXvw/AxM4jmLLhG7YctdX1ishmiAgrkzy3rqUxacBT9G7RkcjgMBY/NpG0jHQajy59j4NyHjHFdeSfqbDI9dgeE9oQ220zugIDjTGLS3O84ZfS/zEPFznhHXeHUG7q1KxWcqFKxNvrbKMglcv63/e6O4RyZaascNqLm5LxdZk/70L9b3H7m8zqLbt/FJG/sTUKArxijNnpksiUUkq5jaXGQUTqAYnGmCl56wEiUtcYs7+EQ5VSqkqpKI/7LCur0X9Xym1KKaU8DTVrIQAACcFJREFUmNUBaT9jTMEAtDHmlIj4OzkmpZTyeBXlYrayshq9EZGC+zaISB3Ofo2SUkopD2Q1c5gA/CYi+be+vBN4xbkhKaWUcjers5U+FZF/gG55m+41xnj+3b6UUsrJKsrdVcvKauaAMeZX4FenR6KUUqrCKFXjICJvGGOeEZFvAYcLO4wx/Z0emVJKeTBPf0xoaTOH/CeLzHVVIEoppSqOUjUOxpg5IuINXGSMecbFMSmllMfz9DGHUkdvjMkBrnBhLEoppSoIq03bHBF5RkQiRCQwf3FJZEoppdzG6myl/2/v3IOtquo4/vnysnhYOGIYMnMHZTKiohkkR1Gxl2VmOsaEiApJ9DBSCUtFwSnF0qZGcxxTh5jSphqd0ZugvJLggkAI915QsAiv+Ryih69JHfXXH+t3YHHPvfdw8Nx7z+X8PjN7ztr7t/bav99aa6/feux9duErIjdkxwyq5OsUQRAEVUJPf0O63Pccera1QRAEwX5R9nsOkoYBJ5FGDKvN7PmKaxUEQdDDqal/ZZU0CWgEvgpMAholxTsOQRAEBxnljhyuBcaZ2VMAkupIX4T7Q0W1CoIgCLqVcp3D7oJjADCzFkm7K6xTEARBj6enL0iXq/0ySVdLGirpSElzgPvjkdYgCIKDi3JHDvP894etjt9IPNIaBEGwh56+IB2PsgZBEARFlP0oaxAEQVCaWltzCIIgCGqAcA5BEARBETIr+nbPQYWkWWb2s+7Wo6uoJXtryVYIe4OupRacw7NmdlR369FV1JK9tWQrhL1B1xLTSkEQBEER4RyCIAiCImrBOdTanGUt2VtLtkLYG3QhB/2aQxAEQVA+tTByCIIgCMoknEMQBEFQRDiHIAiCoIhwDkHFkWSSBr7LNOqq+VshB2Kj2zSj1bEWSaMrq13PRtKZkm7qbj1qnapwDpVoTDoLSYslHd2ObKWkM7pap7aQVDN/otiDba0DZpSK1BY92OaykNTHzOrN7PLu1qXWqQrnUM2Y2elm9vfu1EHS3ZI2SmqW9KCkIyRNkNQo6RZJjwJnSxok6U5JGzzu7ZL6ehqzJP1F0maXf7KT1Z4taY2kv0o6N7PlOEl/cns2STonk10saYek1cD07HidpN2S5rpspqSBkhZI2urbvCz+MZKWex40Sjork5mkKz0Pdkr6jKQbPF8el/QRjzfS9W+StEXSdWXYWFReLrodGOU61WfpnCNpraSnJF2dpbNS0vWSVgBL/Nj5rk+zpEWShvnx3pJ+muXHLyT1c9lCrwsrJD0t6WZJp0pa5SOXWR6vl6RbJW13ux+T9J4yyrxdPN+vbSe/TNL3JK0EbpA0VdK9mXya51mT52udHz9NUoPruV7SyZXQNXDMrNs30oeCfgCsB54CpmWyscCjQDOwATjRj9eRPltaiDcwmWMA7wV+DzwBNAFLs3jn+3U2AX8GRpfQraUQBxiVnXsPsA44owvy5/AsfAVwKzABeAcYn8nuAM73sIC7gMt8f0gW73hgayeX5zwPjwB2A8OB93veHVmwC3gaGAp8DHge+IDLbiuUr5e1AZOza/wEuJvUwRkAbAYmumw9MMPDI4F/AcMz3S728ETgNeCLvv994Lcevhm4KrveYftjY3vl5eEJwMY26tfPC2UEvAQM8/2VwCKgr++PBl7M5HOARR7+FvAIcAjpr/gXA5e7bCHQ4LL+wC5ggefdMOBV0v3zCWAb0MvPe18h3Fl1IpPleT0VuDfLsx1Znenv2whgLXCoHz8GeK6QV7FVoMy6W4Gsclzi4Q8Dr3gF7wf8AzjNZeOBF7wxqKN953A2+zqEw/z3RL/ZDvH9k4CmErq1sNc5PAZc6OHjgbfpGudwCbAR2OI3SoPfNNtbxdtFcqKNvj0J3Oayz5Gc4VaXvQP068TyHJbt3w9MBk4H/pvp1+jlewrwXeDO7Jwx7Osc/oe/l5OVRe4YLwN+CQwC3gB6Z7IHgHMz3Q738NHAK1m8TwPrPHwOsBO43vOu1/7Y2F55+fEJtO0cxmb7mwt2kZzDpEw2E7gr2x9cyBfgPmBKJjsbWOLhhcDsTLYa+Eq2/wxwLMkZ7PD4F5I5uc6qE5lsaCabyl7ncBMwt430vk2q73ldeg4Y0dn3Y61s1TSPeQ+AmW2T9BapNzkYeNPMlrisQdIuUi/zhQ7SagKOlXQbqUFc7Me/DHwcWC+pEHeIpH5m9mZHykk6lNRz+43rsk7SlvLNLA9J44HvACeY2T8lnQnMdfGrraMDZ5nZzlZp9CM1HhPM7DG35SWS8+3Q7gpirl+zmRUN/yWNKXH+a+atQuEUT7Ota9COrMDr/vs2yZGQ7fcBMLP7JK0FPkvK/0tJzq0jrER5tcfrWXiPDk5exq1t3p/8aO8aRdc0s5d8Wu0U4FTSFM/JZrajhP4HSq5f67pcCgEPm9kFFdQnyKimNYe2bpC2Kjx+7C32/Wb1nrlRbxxHAQ+TRgtbJQ329BaY2Zhs+2Apx9Dqul3NYOBl4N/eyH+jg7j1wBXyxUtJgyUdQ8qbvqQeIqQeaGfzNdehjjTiayBNA4yU9KlCJElj3K5HgNOz+fmLSqS/DPi6EgOAKcByM3uZ1Iu80NM/mlQH1pSjvKSRwC4z+zVpuun4/bSxo/J6mdQ7P1BWkPJoqO9/E1jhTnMZMFVSPy//i4Dl5SQuaQgwwMyWAleRRjWj3oW+rWkrv0rxR+CCgs2S+kvqDywFPq/sSS9J4yqoa81TTc6hLbYDhxQaE0knAEeQhusvAn0kfcjj7ulBSDqKNMVUD8wmOYXh7K1owz1eL0lj90cRb3S2Auf5ueOAj75rC0vzEGmov520KNnYQdxLSU6zUVIzqXGoc93nAhskrWLf3nJn8YakNaSbeKaZPWNm/wG+BFzji4tPAD8mTdk0A/OBtZIaSOsPHfEjkrPeQlpjqDezwiLmecAUSU2kEdN0M3um7WTaZSLQLGkz8DtSQ1zSRjour2bgSV8wri9KrQRm9jhwJbDUy/ck9jqfO0gj5k1+zRbgljIvMRxY5mlvIdX3h8rVswPayq8OMbNVwHUkm5tIMwFDzOxvpA7BXV6XtpGm84IKURX/rSTJgEFm9qrv7ybNw7ZIOo5UyQeQRhezzKzB400DrgGeJVXi+WYmSV8gNToiOcAHzGyOnzOZ5DB6k3rTi6yDx+YktZDWFbZKGgX8ys/bROpVzTezByuaIUFwkNH6Hg+qn6pwDkEQHNyEc+h5hHMIgiAIiqimp5W6DUnTSU+YtGamma3uan2CIAi6mxg5BEEQBEVU+9NKQRAEQTcQziEIgiAoIpxDEARBUEQ4hyAIgqCIcA5BEARBEf8HsG568OufV78AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 480x400 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 通过热力图可以看出 area，bedrooms，bathrooms 等变量与房屋价格 price 的关系都还比较强\n",
    " ## 所以值得放入模型，但分类变量 style 与 neighborhood 两者与 price 的关系未知\n",
    "heatmap(data=df, figsize=(6,5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 刚才的探索我们发现，style 与 neighborhood 的类别都是三类，\n",
    " ## 如果只是两类的话我们可以进行卡方检验，所以这里我们使用方差分析\n",
    "    \n",
    "## 利用回归模型中的方差分析\n",
    "## 只有 statsmodels 有方差分析库\n",
    "## 从线性回归结果中提取方差分析结果\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.formula.api import ols # ols 为建立线性回归模型的统计学库\n",
    "from statsmodels.stats.anova import anova_lm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "插播一条样本量和置信水平 α_level 的注意点（置信水平 α 的选择经验）\n",
    "\n",
    "    样本量            α-level\n",
    "    ≤ 100              10%\n",
    "    100 ＜ n ≤ 500      5%\n",
    "    500 ＜ n ≤ 1000     1%\n",
    "    n ＞ 2000          千分之一\n",
    "    \n",
    "样本量过大，α-level 就没什么意义了。\n",
    "\n",
    "数据量很大时，p 值就没用了，样本量通常不超过 5000，\n",
    "\n",
    "为了证明两变量间的关系是稳定的，样本量要控制好。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>df</th>\n",
       "      <th>sum_sq</th>\n",
       "      <th>mean_sq</th>\n",
       "      <th>F</th>\n",
       "      <th>PR(&gt;F)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C(neighborhood)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>3.783209e+13</td>\n",
       "      <td>1.891605e+13</td>\n",
       "      <td>153.563336</td>\n",
       "      <td>1.685424e-54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C(style)</th>\n",
       "      <td>2.0</td>\n",
       "      <td>5.861148e+13</td>\n",
       "      <td>2.930574e+13</td>\n",
       "      <td>237.908463</td>\n",
       "      <td>1.198166e-76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Residual</th>\n",
       "      <td>595.0</td>\n",
       "      <td>7.329254e+13</td>\n",
       "      <td>1.231807e+11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    df        sum_sq       mean_sq           F        PR(>F)\n",
       "C(neighborhood)    2.0  3.783209e+13  1.891605e+13  153.563336  1.685424e-54\n",
       "C(style)           2.0  5.861148e+13  2.930574e+13  237.908463  1.198166e-76\n",
       "Residual         595.0  7.329254e+13  1.231807e+11         NaN           NaN"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数据集样本数量：6028，这里随机选择 600 条，如果希望分层抽样，可参考文章：\n",
    "df = df.copy().sample(600)\n",
    "\n",
    "# C 表示告诉 Python 这是分类变量，否则 Python 会当成连续变量使用\n",
    "## 这里直接使用方差分析对所有分类变量进行检验\n",
    "## 下面几行代码便是使用统计学库进行方差分析的标准姿势\n",
    "lm = ols('price ~ C(neighborhood) + C(style)', data=df).fit()\n",
    "anova_lm(lm)\n",
    "\n",
    "# Residual 行表示模型不能解释的组内的，其他的是能解释的组间的\n",
    "# df: 自由度（n-1）- 分类变量中的类别个数减1\n",
    "# sum_sq: 总平方和（SSM），residual行的 sum_eq: SSE\n",
    "# mean_sq: msm, residual行的 mean_sq: mse\n",
    "# F：F 统计量，查看卡方分布表即可\n",
    "# PR(>F): P 值\n",
    "\n",
    "# 反复刷新几次，发现都很显著，所以这两个变量也挺值得放入模型中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多元线性回归建模"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>          <td>price</td>      <th>  R-squared:         </th> <td>   0.680</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.678</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   421.6</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 01 Jul 2020</td> <th>  Prob (F-statistic):</th> <td>7.30e-147</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>01:56:25</td>     <th>  Log-Likelihood:    </th> <td> -8420.3</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   600</td>      <th>  AIC:               </th> <td>1.685e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   596</td>      <th>  BIC:               </th> <td>1.687e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th> <td> 5619.4288</td> <td> 3.24e+04</td> <td>    0.174</td> <td> 0.862</td> <td> -5.8e+04</td> <td> 6.92e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>area</th>      <td>  314.1341</td> <td>   21.622</td> <td>   14.528</td> <td> 0.000</td> <td>  271.669</td> <td>  356.599</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>bedrooms</th>  <td>-5248.5264</td> <td> 3.21e+04</td> <td>   -0.163</td> <td> 0.870</td> <td>-6.83e+04</td> <td> 5.78e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>bathrooms</th> <td> 4.164e+04</td> <td> 4.64e+04</td> <td>    0.898</td> <td> 0.370</td> <td>-4.95e+04</td> <td> 1.33e+05</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>41.088</td> <th>  Durbin-Watson:     </th> <td>   2.118</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  48.489</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.694</td> <th>  Prob(JB):          </th> <td>2.96e-11</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 2.879</td> <th>  Cond. No.          </th> <td>1.19e+04</td>\n",
       "</tr>\n",
       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.19e+04. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                  price   R-squared:                       0.680\n",
       "Model:                            OLS   Adj. R-squared:                  0.678\n",
       "Method:                 Least Squares   F-statistic:                     421.6\n",
       "Date:                Wed, 01 Jul 2020   Prob (F-statistic):          7.30e-147\n",
       "Time:                        01:56:25   Log-Likelihood:                -8420.3\n",
       "No. Observations:                 600   AIC:                         1.685e+04\n",
       "Df Residuals:                     596   BIC:                         1.687e+04\n",
       "Df Model:                           3                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept   5619.4288   3.24e+04      0.174      0.862    -5.8e+04    6.92e+04\n",
       "area         314.1341     21.622     14.528      0.000     271.669     356.599\n",
       "bedrooms   -5248.5264   3.21e+04     -0.163      0.870   -6.83e+04    5.78e+04\n",
       "bathrooms   4.164e+04   4.64e+04      0.898      0.370   -4.95e+04    1.33e+05\n",
       "==============================================================================\n",
       "Omnibus:                       41.088   Durbin-Watson:                   2.118\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               48.489\n",
       "Skew:                           0.694   Prob(JB):                     2.96e-11\n",
       "Kurtosis:                       2.879   Cond. No.                     1.19e+04\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 1.19e+04. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from statsmodels.formula.api import ols\n",
    "\n",
    "lm = ols('price ~ area + bedrooms + bathrooms', data=df).fit()\n",
    "lm.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模型优化\n",
    "发现精度还不够高，这里通过添加虚拟变量与使用方差膨胀因子检测多元共线性的方式来提升模型精度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2253</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A  B\n",
       "2253  1  0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 设置虚拟变量\n",
    "# 以名义变量 neighborhood 街区为例\n",
    "nominal_data = df['neighborhood']\n",
    "\n",
    "# 设置虚拟变量\n",
    "dummies = pd.get_dummies(nominal_data)\n",
    "dummies.sample()  # pandas 会自动帮你命名\n",
    "\n",
    "# 每个名义变量生成的虚拟变量中，需要各丢弃一个，这里以丢弃C为例\n",
    "dummies.drop(columns=['C'], inplace=True)\n",
    "dummies.sample()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>house_id</th>\n",
       "      <th>neighborhood</th>\n",
       "      <th>area</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>style</th>\n",
       "      <th>price</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1797</th>\n",
       "      <td>5306</td>\n",
       "      <td>A</td>\n",
       "      <td>812</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>lodge</td>\n",
       "      <td>213459</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3725</th>\n",
       "      <td>4370</td>\n",
       "      <td>C</td>\n",
       "      <td>1770</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>ranch</td>\n",
       "      <td>451515</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3246</th>\n",
       "      <td>1033</td>\n",
       "      <td>A</td>\n",
       "      <td>3035</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>victorian</td>\n",
       "      <td>765539</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      house_id neighborhood  area  bedrooms  bathrooms      style   price  A  \\\n",
       "1797      5306            A   812         0          0      lodge  213459  1   \n",
       "3725      4370            C  1770         4          2      ranch  451515  0   \n",
       "3246      1033            A  3035         6          4  victorian  765539  1   \n",
       "\n",
       "      B  \n",
       "1797  0  \n",
       "3725  0  \n",
       "3246  0  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将结果与原数据集拼接\n",
    "results = pd.concat(objs=[df, dummies], axis='columns')  # 按照列来合并\n",
    "results.sample(3)\n",
    "# 对名义变量 style 的处理可自行尝试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>          <td>price</td>      <th>  R-squared:         </th> <td>   0.917</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.916</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   1310.</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 01 Jul 2020</td> <th>  Prob (F-statistic):</th> <td>4.94e-318</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>01:56:58</td>     <th>  Log-Likelihood:    </th> <td> -8015.6</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   600</td>      <th>  AIC:               </th> <td>1.604e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   594</td>      <th>  BIC:               </th> <td>1.607e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     5</td>      <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th> <td>-1.989e+05</td> <td> 1.91e+04</td> <td>  -10.409</td> <td> 0.000</td> <td>-2.36e+05</td> <td>-1.61e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>area</th>      <td>  347.3181</td> <td>   11.110</td> <td>   31.261</td> <td> 0.000</td> <td>  325.498</td> <td>  369.138</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>bedrooms</th>  <td> 1.114e+04</td> <td> 1.64e+04</td> <td>    0.679</td> <td> 0.497</td> <td>-2.11e+04</td> <td> 4.33e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>bathrooms</th> <td> -1.65e+04</td> <td> 2.37e+04</td> <td>   -0.696</td> <td> 0.487</td> <td>-6.31e+04</td> <td> 3.01e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>A</th>         <td>-2.535e+04</td> <td> 1.63e+04</td> <td>   -1.556</td> <td> 0.120</td> <td>-5.73e+04</td> <td> 6641.583</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>B</th>         <td> 5.182e+05</td> <td> 1.55e+04</td> <td>   33.535</td> <td> 0.000</td> <td> 4.88e+05</td> <td> 5.49e+05</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>32.404</td> <th>  Durbin-Watson:     </th> <td>   1.960</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  44.693</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.457</td> <th>  Prob(JB):          </th> <td>1.97e-10</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 3.976</td> <th>  Cond. No.          </th> <td>1.21e+04</td>\n",
       "</tr>\n",
       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.21e+04. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                  price   R-squared:                       0.917\n",
       "Model:                            OLS   Adj. R-squared:                  0.916\n",
       "Method:                 Least Squares   F-statistic:                     1310.\n",
       "Date:                Wed, 01 Jul 2020   Prob (F-statistic):          4.94e-318\n",
       "Time:                        01:56:58   Log-Likelihood:                -8015.6\n",
       "No. Observations:                 600   AIC:                         1.604e+04\n",
       "Df Residuals:                     594   BIC:                         1.607e+04\n",
       "Df Model:                           5                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept  -1.989e+05   1.91e+04    -10.409      0.000   -2.36e+05   -1.61e+05\n",
       "area         347.3181     11.110     31.261      0.000     325.498     369.138\n",
       "bedrooms    1.114e+04   1.64e+04      0.679      0.497   -2.11e+04    4.33e+04\n",
       "bathrooms   -1.65e+04   2.37e+04     -0.696      0.487   -6.31e+04    3.01e+04\n",
       "A          -2.535e+04   1.63e+04     -1.556      0.120   -5.73e+04    6641.583\n",
       "B           5.182e+05   1.55e+04     33.535      0.000    4.88e+05    5.49e+05\n",
       "==============================================================================\n",
       "Omnibus:                       32.404   Durbin-Watson:                   1.960\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               44.693\n",
       "Skew:                           0.457   Prob(JB):                     1.97e-10\n",
       "Kurtosis:                       3.976   Cond. No.                     1.21e+04\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 1.21e+04. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 再次建模\n",
    "lm = ols('price ~ area + bedrooms + bathrooms + A + B', data=results).fit()\n",
    "lm.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 模型末尾提示可能存在多元共线性，需要处理一下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 自定义方差膨胀因子的检测公式\n",
    "def vif(df, col_i):\n",
    "    \"\"\"\n",
    "    df: 整份数据\n",
    "    col_i：被检测的列名\n",
    "    \"\"\"\n",
    "    cols = list(df.columns)\n",
    "    cols.remove(col_i)\n",
    "    cols_noti = cols\n",
    "    formula = col_i + '~' + '+'.join(cols_noti)\n",
    "    r2 = ols(formula, df).fit().rsquared\n",
    "    return 1. / (1. - r2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "area \t 5.107868214175136\n",
      "bedrooms \t 21.10313675002999\n",
      "bathrooms \t 20.40342309177548\n",
      "A \t 1.4543177529320659\n",
      "B \t 1.4411499419523033\n"
     ]
    }
   ],
   "source": [
    "test_data = results[['area', 'bedrooms', 'bathrooms', 'A', 'B']]\n",
    "for i in test_data.columns:\n",
    "    print(i, '\\t', vif(df=test_data, col_i=i))\n",
    "# 发现 bedrooms 和 bathrooms 存在强相关性，可能这两个变量是解释同一个问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>          <td>price</td>      <th>  R-squared:         </th> <td>   0.917</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.916</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   1639.</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 01 Jul 2020</td> <th>  Prob (F-statistic):</th> <td>1.44e-319</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>01:59:14</td>     <th>  Log-Likelihood:    </th> <td> -8015.9</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   600</td>      <th>  AIC:               </th> <td>1.604e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   595</td>      <th>  BIC:               </th> <td>1.606e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     4</td>      <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th> <td>-1.919e+05</td> <td> 1.61e+04</td> <td>  -11.919</td> <td> 0.000</td> <td>-2.23e+05</td> <td> -1.6e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>area</th>      <td>  349.2697</td> <td>   10.727</td> <td>   32.559</td> <td> 0.000</td> <td>  328.202</td> <td>  370.338</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>bathrooms</th> <td>-2388.5200</td> <td> 1.14e+04</td> <td>   -0.209</td> <td> 0.835</td> <td>-2.48e+04</td> <td> 2.01e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>A</th>         <td>-2.538e+04</td> <td> 1.63e+04</td> <td>   -1.559</td> <td> 0.120</td> <td>-5.74e+04</td> <td> 6593.472</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>B</th>         <td> 5.179e+05</td> <td> 1.54e+04</td> <td>   33.543</td> <td> 0.000</td> <td> 4.88e+05</td> <td> 5.48e+05</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>30.175</td> <th>  Durbin-Watson:     </th> <td>   1.960</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  40.986</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.438</td> <th>  Prob(JB):          </th> <td>1.26e-09</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 3.934</td> <th>  Cond. No.          </th> <td>9.20e+03</td>\n",
       "</tr>\n",
       "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 9.2e+03. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                  price   R-squared:                       0.917\n",
       "Model:                            OLS   Adj. R-squared:                  0.916\n",
       "Method:                 Least Squares   F-statistic:                     1639.\n",
       "Date:                Wed, 01 Jul 2020   Prob (F-statistic):          1.44e-319\n",
       "Time:                        01:59:14   Log-Likelihood:                -8015.9\n",
       "No. Observations:                 600   AIC:                         1.604e+04\n",
       "Df Residuals:                     595   BIC:                         1.606e+04\n",
       "Df Model:                           4                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept  -1.919e+05   1.61e+04    -11.919      0.000   -2.23e+05    -1.6e+05\n",
       "area         349.2697     10.727     32.559      0.000     328.202     370.338\n",
       "bathrooms  -2388.5200   1.14e+04     -0.209      0.835   -2.48e+04    2.01e+04\n",
       "A          -2.538e+04   1.63e+04     -1.559      0.120   -5.74e+04    6593.472\n",
       "B           5.179e+05   1.54e+04     33.543      0.000    4.88e+05    5.48e+05\n",
       "==============================================================================\n",
       "Omnibus:                       30.175   Durbin-Watson:                   1.960\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               40.986\n",
       "Skew:                           0.438   Prob(JB):                     1.26e-09\n",
       "Kurtosis:                       3.934   Cond. No.                     9.20e+03\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 9.2e+03. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 果然，bedrooms 和 bathrooms 这两个变量的方差膨胀因子较高，\n",
    " # 也印证了方差膨胀因子大多成对出现的原则，这里我们丢弃膨胀因子较大的 bedrooms 即可\n",
    "lm = ols(formula='price ~ area + bathrooms + A + B', data=results).fit()\n",
    "lm.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "area \t 4.6919801219379025\n",
      "bathrooms \t 4.6919801219379025\n"
     ]
    }
   ],
   "source": [
    "# 再次进行多元共线性检测\n",
    "test_data = df[['area', 'bathrooms']]\n",
    "for i in test_data.columns:\n",
    "    print(i, '\\t', vif(df=test_data, col_i=i))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 精度没变，但具体问题还是需要结合具体业务来分析"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
